ad = read.csv('Advertising_Updated.csv')
lm <- lm(Sales~., data=ad)
new.dat <- data.frame(TV=200, Radio=10, Newspaper=20)
predict(lm, newdata = new.dat, interval = "confidence")
       fit      lwr      upr
1 13.95637 13.60053 14.31221

Q.1

library(dplyr)
Registered S3 method overwritten by 'dplyr':
  method           from
  print.rowwise_df     

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(magrittr)
data = read.csv('KAG.csv', stringsAsFactors = FALSE)
head(data)
group_by(data, ad_id) %>% 
  summarise(., cpc = sum(CPC), imp = sum(Impressions)) %>% 
  filter(cpc < 1) %>% 
  arrange(., cpc, desc(imp))

The ad among ads with cpc = 0 that led to the most impressions is 1121094.

Q.2

group_by(data, campaign_id) %>% 
  summarise(., spent = sum(Spent), imp = sum(Impressions), ) %>% 
  mutate(., cpm=spent/imp) %>%
  arrange(., cpm)

Campaign 1178 was the least efficient on brand awareness on average.

Q.3

summarise(data, spent = sum(Spent), tc = 5*sum(CostPerConv_Total), ac = 50*sum(CostPerConv_Approved), ) %>% 
  mutate(., roas=(tc+ac)/spent) %>%
  arrange(., roas)

ROAS is 34.32.

library(ggplot2)
d = filter(data, interest==15 | interest==21 | interest==101) %>%
  mutate(., interest=factor(interest)) %>%
  group_by(., interest, gender) %>% 
  mutate(., roas=(CostPerConv_Total+CostPerConv_Approved)/Spent) %>%
  filter(!is.na(roas)) %>%
  arrange(., roas)
ggplot(d, aes(x = interest, y = roas, group = interest)) + 
  geom_boxplot() +
  labs(x="Interest ID", y = "ROAS")

filter(data, campaign_id == 1178) %>%
  mutate(., roas=(CostPerConv_Total+CostPerConv_Approved)/Spent, gender=factor(gender)) %>%
  filter(!is.na(roas)) %>%
  group_by(., gender) %>% 
  summarise(., mn = mean(roas), md = median(roas)) 
NA

Q.5

library(readr)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ tibble  2.1.3     ✓ stringr 1.4.0
✓ tidyr   1.0.2     ✓ forcats 0.4.0
✓ purrr   0.3.3     
── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x tidyr::extract()   masks magrittr::extract()
x dplyr::filter()    masks stats::filter()
x dplyr::lag()       masks stats::lag()
x purrr::set_names() masks magrittr::set_names()
library(correlationfunnel)
══ Using correlationfunnel? ══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
You might also be interested in applied data science training for business.
</> Learn more at - www.business-science.io </>
library(DataExplorer)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
library(WVPlots)
library(ggthemes) 
library(ROCR)
Loading required package: gplots

Attaching package: ‘gplots’

The following object is masked from ‘package:stats’:

    lowess
library(caret)
Loading required package: lattice

Attaching package: ‘caret’

The following object is masked from ‘package:purrr’:

    lift
library(corrplot)
corrplot 0.84 loaded

a)

data = read.csv('advertising1.csv', stringsAsFactors = TRUE)
head(data)
ggplot(data, aes( y = Daily.Time.Spent.on.Site, )) + 
  geom_boxplot() +
  labs(y = "Daily.Time.Spent.on.Site")

ggplot(data, aes( y = Daily.Internet.Usage, )) + 
  geom_boxplot() +
  labs(y = "Daily.Internet.Usage")

ggplot(data, aes( y = Area.Income, )) + 
  geom_boxplot() +
  labs(y = "Area.Income")

b)

data
ggplot(data=data, aes(x=Male, y=Clicked.on.Ad)) +
  geom_bar(stat="identity")

ggplot(data=data, aes(x=Age, y=Clicked.on.Ad)) +
  geom_bar(stat="identity")

c)

ggplot(data, aes( x= factor(Clicked.on.Ad),y = Age, )) + 
  geom_boxplot() +
  labs(y = "Age")

ggplot(data, aes( x= factor(Clicked.on.Ad),y = Area.Income, )) + 
  geom_boxplot() +
  labs(y = "Area.Income")

ggplot(data, aes( x= factor(Clicked.on.Ad),y = Daily.Internet.Usage, )) + 
  geom_boxplot() +
  labs(y = "Daily.Internet.Usage")

ggplot(data, aes( x= factor(Clicked.on.Ad),y = Daily.Time.Spent.on.Site, )) + 
  geom_boxplot() +
  labs(y = "Daily.Time.Spent.on.Site")

d)

Based on our preliminary boxplots, I would expect an older person to be more likely to click on the ad than someone younger.

Q.6

ggplot(data, aes(x=Age, y=Area.Income, shape=factor(Clicked.on.Ad), color=factor(Clicked.on.Ad))) +
  geom_point()

Based on this plot I would not expect a 32 year old making an income of $62,000 to click on the add, because it seems like clicks have a positive correlation with age and negative correlation with income. The majority of clicks happer at ages > 32 and incomes < $60,000

ggplot(data, aes(x=Age, y=Daily.Time.Spent.on.Site, shape=factor(Clicked.on.Ad), color=factor(Clicked.on.Ad))) +
  geom_point()

Based on this plot, I would not expect a 50-year-old person who spends 60 minutes daily on the site to click on the ad because there seems to be a negative correlation between time spent on the site and clicks. People that spend more time on the site are less likely to click on the ad.

Q.7

data %>%
mutate_if(is.numeric,as.numeric)%>%
binarize() %>%
correlate(Clicked.on.Ad__1) %>%
plot_correlation_funnel(interactive = TRUE, alpha = 0.7)

b)

model = glm(Clicked.on.Ad~Daily.Internet.Usage + Daily.Time.Spent.on.Site + Age + Area.Income,data=data,family=binomial())
summary(model)

Call:
glm(formula = Clicked.on.Ad ~ Daily.Internet.Usage + Daily.Time.Spent.on.Site + 
    Age + Area.Income, family = binomial(), data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.4578  -0.1341  -0.0333   0.0167   3.1961  

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)               2.713e+01  2.714e+00   9.995  < 2e-16 ***
Daily.Internet.Usage     -6.391e-02  6.745e-03  -9.475  < 2e-16 ***
Daily.Time.Spent.on.Site -1.919e-01  2.066e-02  -9.291  < 2e-16 ***
Age                       1.709e-01  2.568e-02   6.655 2.83e-11 ***
Area.Income              -1.354e-04  1.868e-05  -7.247 4.25e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1386.3  on 999  degrees of freedom
Residual deviance:  182.9  on 995  degrees of freedom
AIC: 192.9

Number of Fisher Scoring iterations: 8

Q.8

data$predict = predict(model, data, type="response")>=.8
data$test = data$Clicked.on.Ad==1
confusionMatrix(factor(data$predict), factor(data$test))
Confusion Matrix and Statistics

          Reference
Prediction FALSE TRUE
     FALSE   497   36
     TRUE      3  464
                                          
               Accuracy : 0.961           
                 95% CI : (0.9471, 0.9721)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.922           
                                          
 Mcnemar's Test P-Value : 2.99e-07        
                                          
            Sensitivity : 0.9940          
            Specificity : 0.9280          
         Pos Pred Value : 0.9325          
         Neg Pred Value : 0.9936          
             Prevalence : 0.5000          
         Detection Rate : 0.4970          
   Detection Prevalence : 0.5330          
      Balanced Accuracy : 0.9610          
                                          
       'Positive' Class : FALSE           
                                          
LS0tCnRpdGxlOiAiSFcgMyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQphZCA9IHJlYWQuY3N2KCdBZHZlcnRpc2luZ19VcGRhdGVkLmNzdicpCgpsbSA8LSBsbShTYWxlc34uLCBkYXRhPWFkKQpuZXcuZGF0IDwtIGRhdGEuZnJhbWUoVFY9MjAwLCBSYWRpbz0xMCwgTmV3c3BhcGVyPTIwKQoKcHJlZGljdChsbSwgbmV3ZGF0YSA9IG5ldy5kYXQsIGludGVydmFsID0gImNvbmZpZGVuY2UiKQpgYGAKIyBRLjEKCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KG1hZ3JpdHRyKQpkYXRhID0gcmVhZC5jc3YoJ0tBRy5jc3YnLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCgpoZWFkKGRhdGEpCgoKCmdyb3VwX2J5KGRhdGEsIGFkX2lkKSAlPiUgCiAgc3VtbWFyaXNlKC4sIGNwYyA9IHN1bShDUEMpLCBpbXAgPSBzdW0oSW1wcmVzc2lvbnMpKSAlPiUgCiAgZmlsdGVyKGNwYyA8IDEpICU+JSAKICBhcnJhbmdlKC4sIGNwYywgZGVzYyhpbXApKQoKCgpgYGAKVGhlIGFkIGFtb25nIGFkcyB3aXRoIGNwYyA9IDAgdGhhdCBsZWQgdG8gdGhlIG1vc3QgaW1wcmVzc2lvbnMgaXMgMTEyMTA5NC4KCiMgUS4yCmBgYHtyfQpncm91cF9ieShkYXRhLCBjYW1wYWlnbl9pZCkgJT4lIAogIHN1bW1hcmlzZSguLCBzcGVudCA9IHN1bShTcGVudCksIGltcCA9IHN1bShJbXByZXNzaW9ucyksICkgJT4lIAogIG11dGF0ZSguLCBjcG09c3BlbnQvaW1wKSAlPiUKICBhcnJhbmdlKC4sIGNwbSkKYGBgCgpDYW1wYWlnbiAxMTc4IHdhcyB0aGUgbGVhc3QgZWZmaWNpZW50IG9uIGJyYW5kIGF3YXJlbmVzcyBvbiBhdmVyYWdlLgoKIyBRLjMKCmBgYHtyfQoKc3VtbWFyaXNlKGRhdGEsIHNwZW50ID0gc3VtKFNwZW50KSwgdGMgPSA1KnN1bShDb3N0UGVyQ29udl9Ub3RhbCksIGFjID0gNTAqc3VtKENvc3RQZXJDb252X0FwcHJvdmVkKSwgKSAlPiUgCiAgbXV0YXRlKC4sIHJvYXM9KHRjK2FjKS9zcGVudCkgJT4lCiAgYXJyYW5nZSguLCByb2FzKQpgYGAKUk9BUyBpcyAzNC4zMi4KCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQoKZCA9IGZpbHRlcihkYXRhLCBpbnRlcmVzdD09MTUgfCBpbnRlcmVzdD09MjEgfCBpbnRlcmVzdD09MTAxKSAlPiUKICBtdXRhdGUoLiwgaW50ZXJlc3Q9ZmFjdG9yKGludGVyZXN0KSkgJT4lCiAgZ3JvdXBfYnkoLiwgaW50ZXJlc3QsIGdlbmRlcikgJT4lIAogIG11dGF0ZSguLCByb2FzPShDb3N0UGVyQ29udl9Ub3RhbCtDb3N0UGVyQ29udl9BcHByb3ZlZCkvU3BlbnQpICU+JQogIGZpbHRlcighaXMubmEocm9hcykpICU+JQogIGFycmFuZ2UoLiwgcm9hcykKCmdncGxvdChkLCBhZXMoeCA9IGludGVyZXN0LCB5ID0gcm9hcywgZ3JvdXAgPSBpbnRlcmVzdCkpICsgCiAgZ2VvbV9ib3hwbG90KCkgKwogIGxhYnMoeD0iSW50ZXJlc3QgSUQiLCB5ID0gIlJPQVMiKQpgYGAKCmBgYHtyfQpmaWx0ZXIoZGF0YSwgY2FtcGFpZ25faWQgPT0gMTE3OCkgJT4lCiAgbXV0YXRlKC4sIHJvYXM9KENvc3RQZXJDb252X1RvdGFsK0Nvc3RQZXJDb252X0FwcHJvdmVkKS9TcGVudCwgZ2VuZGVyPWZhY3RvcihnZW5kZXIpKSAlPiUKICBmaWx0ZXIoIWlzLm5hKHJvYXMpKSAlPiUKICBncm91cF9ieSguLCBnZW5kZXIpICU+JSAKICBzdW1tYXJpc2UoLiwgbW4gPSBtZWFuKHJvYXMpLCBtZCA9IG1lZGlhbihyb2FzKSkgCiAgCmBgYAoKIyBRLjUKYGBge3J9CmxpYnJhcnkocmVhZHIpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGNvcnJlbGF0aW9uZnVubmVsKQpsaWJyYXJ5KERhdGFFeHBsb3JlcikKbGlicmFyeShXVlBsb3RzKQpsaWJyYXJ5KGdndGhlbWVzKSAKbGlicmFyeShST0NSKQpsaWJyYXJ5KGNhcmV0KQpsaWJyYXJ5KGNvcnJwbG90KQoKCgoKYGBgCgojIyBhKQoKYGBge3J9CmRhdGEgPSByZWFkLmNzdignYWR2ZXJ0aXNpbmcxLmNzdicsIHN0cmluZ3NBc0ZhY3RvcnMgPSBUUlVFKQoKaGVhZChkYXRhKQoKCgpnZ3Bsb3QoZGF0YSwgYWVzKCB5ID0gRGFpbHkuVGltZS5TcGVudC5vbi5TaXRlLCApKSArIAogIGdlb21fYm94cGxvdCgpICsKICBsYWJzKHkgPSAiRGFpbHkuVGltZS5TcGVudC5vbi5TaXRlIikKCmdncGxvdChkYXRhLCBhZXMoIHkgPSBEYWlseS5JbnRlcm5ldC5Vc2FnZSwgKSkgKyAKICBnZW9tX2JveHBsb3QoKSArCiAgbGFicyh5ID0gIkRhaWx5LkludGVybmV0LlVzYWdlIikKCmdncGxvdChkYXRhLCBhZXMoIHkgPSBBcmVhLkluY29tZSwgKSkgKyAKICBnZW9tX2JveHBsb3QoKSArCiAgbGFicyh5ID0gIkFyZWEuSW5jb21lIikKCmBgYAojIyBiKQoKYGBge3J9CmRhdGEKCmdncGxvdChkYXRhPWRhdGEsIGFlcyh4PU1hbGUsIHk9Q2xpY2tlZC5vbi5BZCkpICsKICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIpCgpnZ3Bsb3QoZGF0YT1kYXRhLCBhZXMoeD1BZ2UsIHk9Q2xpY2tlZC5vbi5BZCkpICsKICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIpCmBgYAoKIyMgYykKCmBgYHtyfQpnZ3Bsb3QoZGF0YSwgYWVzKCB4PSBmYWN0b3IoQ2xpY2tlZC5vbi5BZCkseSA9IEFnZSwgKSkgKyAKICBnZW9tX2JveHBsb3QoKSArCiAgbGFicyh5ID0gIkFnZSIpCgoKZ2dwbG90KGRhdGEsIGFlcyggeD0gZmFjdG9yKENsaWNrZWQub24uQWQpLHkgPSBBcmVhLkluY29tZSwgKSkgKyAKICBnZW9tX2JveHBsb3QoKSArCiAgbGFicyh5ID0gIkFyZWEuSW5jb21lIikKCgpnZ3Bsb3QoZGF0YSwgYWVzKCB4PSBmYWN0b3IoQ2xpY2tlZC5vbi5BZCkseSA9IERhaWx5LkludGVybmV0LlVzYWdlLCApKSArIAogIGdlb21fYm94cGxvdCgpICsKICBsYWJzKHkgPSAiRGFpbHkuSW50ZXJuZXQuVXNhZ2UiKQoKCmdncGxvdChkYXRhLCBhZXMoIHg9IGZhY3RvcihDbGlja2VkLm9uLkFkKSx5ID0gRGFpbHkuVGltZS5TcGVudC5vbi5TaXRlLCApKSArIAogIGdlb21fYm94cGxvdCgpICsKICBsYWJzKHkgPSAiRGFpbHkuVGltZS5TcGVudC5vbi5TaXRlIikKYGBgCiMjIGQpICAKCkJhc2VkIG9uIG91ciBwcmVsaW1pbmFyeSBib3hwbG90cywgSSB3b3VsZCBleHBlY3QgYW4gb2xkZXIgcGVyc29uIHRvIGJlIG1vcmUgbGlrZWx5IHRvIGNsaWNrIG9uIHRoZSBhZCB0aGFuIHNvbWVvbmUgeW91bmdlci4KCgoKIyBRLjYKCgpgYGB7cn0KZ2dwbG90KGRhdGEsIGFlcyh4PUFnZSwgeT1BcmVhLkluY29tZSwgc2hhcGU9ZmFjdG9yKENsaWNrZWQub24uQWQpLCBjb2xvcj1mYWN0b3IoQ2xpY2tlZC5vbi5BZCkpKSArCiAgZ2VvbV9wb2ludCgpCmBgYAoKCgpCYXNlZCBvbiB0aGlzIHBsb3QgSSB3b3VsZCBub3QgZXhwZWN0IGEgMzIgeWVhciBvbGQgbWFraW5nIGFuIGluY29tZSBvZiAkNjIsMDAwIHRvIGNsaWNrIG9uIHRoZSBhZGQsIGJlY2F1c2UgaXQgc2VlbXMgbGlrZSBjbGlja3MgaGF2ZSBhIHBvc2l0aXZlIGNvcnJlbGF0aW9uIHdpdGggYWdlIGFuZCBuZWdhdGl2ZSBjb3JyZWxhdGlvbiB3aXRoIGluY29tZS4gIFRoZSBtYWpvcml0eSBvZiBjbGlja3MgaGFwcGVyIGF0IGFnZXMgPiAzMiBhbmQgaW5jb21lcyA8ICQ2MCwwMDAKCmBgYHtyfQpnZ3Bsb3QoZGF0YSwgYWVzKHg9QWdlLCB5PURhaWx5LlRpbWUuU3BlbnQub24uU2l0ZSwgc2hhcGU9ZmFjdG9yKENsaWNrZWQub24uQWQpLCBjb2xvcj1mYWN0b3IoQ2xpY2tlZC5vbi5BZCkpKSArCiAgZ2VvbV9wb2ludCgpCmBgYAoKQmFzZWQgb24gdGhpcyBwbG90LCBJIHdvdWxkIG5vdCBleHBlY3QgYSA1MC15ZWFyLW9sZCBwZXJzb24gd2hvIHNwZW5kcyA2MCBtaW51dGVzIGRhaWx5IG9uIHRoZSBzaXRlIHRvIGNsaWNrIG9uIHRoZSBhZCBiZWNhdXNlIHRoZXJlIHNlZW1zIHRvIGJlIGEgbmVnYXRpdmUgY29ycmVsYXRpb24gYmV0d2VlbiB0aW1lIHNwZW50IG9uIHRoZSBzaXRlIGFuZCBjbGlja3MuICBQZW9wbGUgdGhhdCBzcGVuZCBtb3JlIHRpbWUgb24gdGhlIHNpdGUgYXJlIGxlc3MgbGlrZWx5IHRvIGNsaWNrIG9uIHRoZSBhZC4KCiMjIFEuNwpgYGB7cn0KZGF0YSAlPiUKbXV0YXRlX2lmKGlzLm51bWVyaWMsYXMubnVtZXJpYyklPiUKYmluYXJpemUoKSAlPiUKY29ycmVsYXRlKENsaWNrZWQub24uQWRfXzEpICU+JQpwbG90X2NvcnJlbGF0aW9uX2Z1bm5lbChpbnRlcmFjdGl2ZSA9IFRSVUUsIGFscGhhID0gMC43KQpgYGAKCiMjIyBiKQpgYGB7cn0KbW9kZWwgPSBnbG0oQ2xpY2tlZC5vbi5BZH5EYWlseS5JbnRlcm5ldC5Vc2FnZSArIERhaWx5LlRpbWUuU3BlbnQub24uU2l0ZSArIEFnZSArIEFyZWEuSW5jb21lLGRhdGE9ZGF0YSxmYW1pbHk9Ymlub21pYWwoKSkKc3VtbWFyeShtb2RlbCkKYGBgCiMjIFEuOApgYGB7cn0KCmRhdGEkcHJlZGljdCA9IHByZWRpY3QobW9kZWwsIGRhdGEsIHR5cGU9InJlc3BvbnNlIik+PS44CmRhdGEkdGVzdCA9IGRhdGEkQ2xpY2tlZC5vbi5BZD09MQpjb25mdXNpb25NYXRyaXgoZmFjdG9yKGRhdGEkcHJlZGljdCksIGZhY3RvcihkYXRhJHRlc3QpKQoKCmBgYAo=